"""Metric functions for Simulation C.

Provides utilities to compute slopes of log attenuation and collapse
metrics (R² and RMSE) for the measured visibilities relative to the
predicted exponential law.
"""

from __future__ import annotations

import numpy as np
import pandas as pd
from typing import Sequence, Tuple, Dict, Iterable

def bootstrap_ci(data: Iterable[float], ci: float = 0.68, n_bootstrap: int = 1000) -> Tuple[float, float]:
    """Return a central confidence interval around the mean of ``data`` via bootstrapping.

    Parameters
    ----------
    data : iterable of floats
        The sample data from which to estimate the confidence interval.
    ci : float, default 0.68
        Fraction of mass inside the confidence interval.  For 68 % CI use 0.68; for 95 % use 0.95.
    n_bootstrap : int, default 1000
        Number of bootstrap resamples to draw.

    Returns
    -------
    (low, high) : tuple of floats
        Lower and upper bounds of the central confidence interval.  If the
        input data is empty the interval is (0.0, 0.0).

    Notes
    -----
    This implementation mirrors the bootstrap routine used in Sim B.  It
    randomly resamples the input array with replacement ``n_bootstrap``
    times, computes the mean of each resample, then takes the
    appropriate quantiles to form a symmetric confidence interval.  A
    fixed RNG seed is used to ensure reproducibility across runs.
    """
    arr = np.asarray(list(data), dtype=float)
    if arr.size == 0:
        return (0.0, 0.0)
    rng = np.random.default_rng(12345)
    means = []
    for _ in range(n_bootstrap):
        sample = rng.choice(arr, size=arr.size, replace=True)
        means.append(sample.mean())
    means = np.sort(np.array(means, dtype=float))
    low_idx = int(((1.0 - ci) / 2.0) * n_bootstrap)
    high_idx = int((1.0 - (1.0 - ci) / 2.0) * n_bootstrap) - 1
    low = float(means[low_idx])
    high = float(means[high_idx])
    return (low, high)


def compute_slopes(df: pd.DataFrame) -> Dict[float, float]:
    """Compute the slope of log(V/V0) vs N for each p across seeds.

    Returns a mapping from p to median slope across seeds.
    """
    slopes = {}
    for p in sorted(df["p"].unique()):
        if p == 0.0:
            continue
        slope_list = []
        sub_p = df[df["p"] == p]
        for seed in sub_p["seed"].unique():
            sub = sub_p[sub_p["seed"] == seed]
            # need at least two points to fit a line
            if len(sub) < 2:
                continue
            N_vals = sub["N"].astype(float).values
            y = np.log(sub["V_over_V0"].astype(float).values + 1e-12)
            # Fit y = m * N + b
            m, b = np.polyfit(N_vals, y, 1)
            slope_list.append(m)
        if slope_list:
            slopes[p] = float(np.median(slope_list))
    return slopes


def compute_collapse_metrics(df: pd.DataFrame) -> Tuple[float, float]:
    """Compute R² and RMSE for the collapse of V/V0 vs exp(-p*N).

    Returns (R², RMSE).
    """
    # exclude p=0 baseline rows
    df = df[df["p"] > 0.0]
    if df.empty:
        return 0.0, 0.0
    x = (df["p"].values * df["N"].values).astype(float)
    y_pred = np.exp(-x)
    y_true = df["V_over_V0"].astype(float).values
    # R^2
    ss_tot = np.sum((y_true - y_true.mean()) ** 2)
    ss_res = np.sum((y_true - y_pred) ** 2)
    r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0
    rmse = float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
    return (float(r2), rmse)